In [1]:
#NumPy is a Python Library used to perform wide variety of Mathematical Operations on Arrays.
import numpy as np
#Pandas is a Python Library used to analyze big data and make Conclusions based on Stastical Theories.
import pandas as pd
#matplotlib.pyplot contains functions that make matplotlib work like MATLAB and
#each pyplot fuction makes some changes in the figure.
import matplotlib.pyplot as plt
#plotly.express contains functions that can create Entire Figures.
import plotly.express as px
import plotly.graph_objects as go
#init_notebook_mode prepares our Jupyter Notebook to display Plotly graphs right within the notebook itself.
from plotly.offline import iplot, plot, init_notebook_mode
from plotly.subplots import make_subplots
#%matplotlib inline makes any Matplotlib plots to automatically be embedded within the notebook, making it easier to view and analyze them as we work.
%matplotlib inline
from wordcloud import WordCloud , ImageColorGenerator
import seaborn as sns
Importing the Dataset.¶
In [3]:
#Loading the Dataset.
Data=pd.read_csv("C:\\Users\\Admin\\Desktop\\Madhu\\Anaconda-Jupyter\\Indian Food Analysis Jupyter Project\\Raw Data.csv")
In [4]:
#To get First 5 Rows of Dataset.
Data.head()
Out[4]:
| Name | Ingredients | Diet | Preparation_Time | Cooking_Time | Flavor | Course_Name | State | Region | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | Balu shahi | Maida flour, yogurt, oil, sugar | Vegetarian | 45 | 25 | Sweet | Dessert | West Bengal | East |
| 1 | Boondi | Gram flour, ghee, sugar | Vegetarian | 80 | 30 | Sweet | Dessert | Rajasthan | West |
| 2 | Gajar ka halwa | Carrots, milk, sugar, ghee, cashews, raisins | Vegetarian | 15 | 60 | Sweet | Dessert | Punjab | North |
| 3 | Ghevar | Flour, ghee, kewra, milk, clarified butter, su... | Vegetarian | 15 | 30 | Sweet | Dessert | Rajasthan | West |
| 4 | Gulab jamun | Milk powder, plain flour, baking powder, ghee,... | Vegetarian | 15 | 40 | Sweet | Dessert | West Bengal | East |
In [5]:
#To get Last 5 Rows of Dataset.
Data.tail()
Out[5]:
| Name | Ingredients | Diet | Preparation_Time | Cooking_Time | Flavor | Course_Name | State | Region | |
|---|---|---|---|---|---|---|---|---|---|
| 250 | Til Pitha | Glutinous rice, black sesame seeds, gur | Vegetarian | 5 | 30 | Sweet | Dessert | Assam | North East |
| 251 | Bebinca | Coconut milk, egg yolks, clarified butter, all... | Vegetarian | 20 | 60 | Sweet | Dessert | Goa | West |
| 252 | Shufta | Cottage cheese, dry dates, dried rose petals, ... | Vegetarian | 20 | 45 | Sweet | Dessert | Jammu & Kashmir | North |
| 253 | Mawa Bati | Milk powder, dry fruits, arrowroot powder, all... | Vegetarian | 20 | 45 | Sweet | Dessert | Madhya Pradesh | Central |
| 254 | Pinaca | Brown rice, fennel seeds, grated coconut, blac... | Vegetarian | 20 | 45 | Sweet | Dessert | Goa | West |
In [6]:
#To get Random 5 Sample Rows from the Dataset
Data.sample(5)
Out[6]:
| Name | Ingredients | Diet | Preparation_Time | Cooking_Time | Flavor | Course_Name | State | Region | |
|---|---|---|---|---|---|---|---|---|---|
| 23 | Cham cham | Flour, cream, sugar, saffron, lemon juice, coc... | Vegetarian | 40 | 60 | Sweet | Dessert | West Bengal | East |
| 215 | Thepla | Chickpea flour, methi leaves, jowar flour, whe... | Vegetarian | 15 | 30 | Spicy | Snack | Gujarat | West |
| 239 | Koldil Chicken | Banana flower, chicken, green chili, mustard o... | Non-Vegetarian | 20 | 30 | Spicy | Main Course | Assam | North East |
| 36 | Adhirasam | Rice flour, jaggery, ghee, vegetable oil, elachi | Vegetarian | 10 | 50 | Sweet | Dessert | West Bengal | East |
| 28 | Mihidana | Besan flour, sugar, ghee | Vegetarian | 15 | 30 | Sweet | Dessert | West Bengal | East |
In [7]:
#To get all the Columns from the Dataset.
Data.columns
Out[7]:
Index(['Name', 'Ingredients', 'Diet', 'Preparation_Time', 'Cooking_Time',
'Flavor', 'Course_Name', 'State', 'Region'],
dtype='object')
Checking the Dataset.¶
In [9]:
#To get Shape and Size of the Data.
Data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 255 entries, 0 to 254 Data columns (total 9 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Name 255 non-null object 1 Ingredients 255 non-null object 2 Diet 255 non-null object 3 Preparation_Time 255 non-null int64 4 Cooking_Time 255 non-null int64 5 Flavor 255 non-null object 6 Course_Name 255 non-null object 7 State 255 non-null object 8 Region 255 non-null object dtypes: int64(2), object(7) memory usage: 18.1+ KB
In [10]:
#To get the Shape of the Dataset.
Data.shape
Out[10]:
(255, 9)
In [11]:
#To get all the Information regarding the Dataset.
Data.describe()
Out[11]:
| Preparation_Time | Cooking_Time | |
|---|---|---|
| count | 255.000000 | 255.000000 |
| mean | 33.576471 | 38.992157 |
| std | 71.769323 | 46.679759 |
| min | 5.000000 | 2.000000 |
| 25% | 10.000000 | 25.000000 |
| 50% | 15.000000 | 30.000000 |
| 75% | 20.000000 | 45.000000 |
| max | 500.000000 | 720.000000 |
In [12]:
#To check the Null Values in the Dataset.
Data.isnull().sum()
Out[12]:
Name 0 Ingredients 0 Diet 0 Preparation_Time 0 Cooking_Time 0 Flavor 0 Course_Name 0 State 0 Region 0 dtype: int64
We have no Null Values.
Analysing and Visualizing the Dataset.¶
1)(i) To get the List of count of Vegetarians and Non-Vegetarians.
In [16]:
Veg_NonVeg = Data.Diet.value_counts().reset_index()
Veg_NonVeg
Out[16]:
| Diet | count | |
|---|---|---|
| 0 | Vegetarian | 226 |
| 1 | Non-Vegetarian | 29 |
1)(ii) To Plot the Graph.
In [18]:
Veg_NonVeg.columns = ['Diet','Count']
fig = px.pie(Veg_NonVeg, values='Count', names='Diet', title='Proportion of Vegetarian and Non-Vegetarian Dishes',
color_discrete_sequence=['red', 'black'])
fig.update_layout(width=700, height=600)
fig.show()
2)(i) To get the List of all Sweet Dishes.
In [20]:
Sweet_Data = Data[Data['Flavor']=='Sweet']
Sweet_Data
Out[20]:
| Name | Ingredients | Diet | Preparation_Time | Cooking_Time | Flavor | Course_Name | State | Region | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | Balu shahi | Maida flour, yogurt, oil, sugar | Vegetarian | 45 | 25 | Sweet | Dessert | West Bengal | East |
| 1 | Boondi | Gram flour, ghee, sugar | Vegetarian | 80 | 30 | Sweet | Dessert | Rajasthan | West |
| 2 | Gajar ka halwa | Carrots, milk, sugar, ghee, cashews, raisins | Vegetarian | 15 | 60 | Sweet | Dessert | Punjab | North |
| 3 | Ghevar | Flour, ghee, kewra, milk, clarified butter, su... | Vegetarian | 15 | 30 | Sweet | Dessert | Rajasthan | West |
| 4 | Gulab jamun | Milk powder, plain flour, baking powder, ghee,... | Vegetarian | 15 | 40 | Sweet | Dessert | West Bengal | East |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 250 | Til Pitha | Glutinous rice, black sesame seeds, gur | Vegetarian | 5 | 30 | Sweet | Dessert | Assam | North East |
| 251 | Bebinca | Coconut milk, egg yolks, clarified butter, all... | Vegetarian | 20 | 60 | Sweet | Dessert | Goa | West |
| 252 | Shufta | Cottage cheese, dry dates, dried rose petals, ... | Vegetarian | 20 | 45 | Sweet | Dessert | Jammu & Kashmir | North |
| 253 | Mawa Bati | Milk powder, dry fruits, arrowroot powder, all... | Vegetarian | 20 | 45 | Sweet | Dessert | Madhya Pradesh | Central |
| 254 | Pinaca | Brown rice, fennel seeds, grated coconut, blac... | Vegetarian | 20 | 45 | Sweet | Dessert | Goa | West |
90 rows × 9 columns
2)(ii) To get the List of Sweets other than Desserts.
In [22]:
Desert_Sweet_Data = Sweet_Data[Sweet_Data['Course_Name']!='Dessert']
Desert_Sweet_Data
Out[22]:
| Name | Ingredients | Diet | Preparation_Time | Cooking_Time | Flavor | Course_Name | State | Region | |
|---|---|---|---|---|---|---|---|---|---|
| 46 | Obbattu holige | Maida flour, turmeric, coconut, chickpeas, jag... | Vegetarian | 180 | 60 | Sweet | Main Course | Karnataka | South |
| 85 | Dal makhani | Red kidney beans, urad dal, cream, garam masal... | Vegetarian | 10 | 60 | Sweet | Main Course | Punjab | North |
| 155 | Puttu | Brown rice flour, sugar, grated coconut | Vegetarian | 495 | 40 | Sweet | Main Course | Kerala | South |
| 176 | Copra paak | Condensed milk, nestle cream, coconut ice, red... | Vegetarian | 20 | 30 | Sweet | Main Course | Gujarat | West |
| 243 | Mishti Chholar Dal | Chana dal, fresh coconut, ginger, cinnamon, ra... | Vegetarian | 10 | 30 | Sweet | Main Course | West Bengal | East |
3)(i) To get the List of all the Flavours.
In [24]:
Flavor_Data = Data.Flavor.value_counts().reset_index()
Flavor_Data
Out[24]:
| Flavor | count | |
|---|---|---|
| 0 | Spicy | 143 |
| 1 | Sweet | 90 |
| 2 | Normal | 13 |
| 3 | Bitter | 6 |
| 4 | Sour | 3 |
3)(ii) To Plot the Graph.
In [26]:
Flavor_Data=Data['Flavor'].value_counts().rename_axis('Flavour').reset_index(name='Food')
plt.figure(figsize=(8,5))
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.title("No of Foods according to the Flavours",fontsize=30)
plt.xlabel('',fontsize=25)
plt.ylabel('',fontsize=20)
Graph=sns.barplot(x="Flavour", y="Food", data=Flavor_Data, hue="Flavour", palette="flare", dodge=False, edgecolor='black')
for container in Graph.containers:
Graph.bar_label(container, fontsize=10) # To get LABEL on EACH BAR.
Graph.set(xlabel='Flavor',ylabel='No of Foods')
plt.show(Graph)
4)(i) To get the List of Top 10 Foods with LOW Cooking Time.
In [28]:
Cooking_Time= Data[['Cooking_Time','Name','Flavor']]
Cooking_Time=Cooking_Time.sort_values(['Cooking_Time'],ascending=True)
Top_10=Cooking_Time.head(10)
Top_10
Out[28]:
| Cooking_Time | Name | Flavor | |
|---|---|---|---|
| 109 | 2 | Pani puri | Spicy |
| 11 | 5 | Lassi | Sweet |
| 147 | 5 | Papadum | Spicy |
| 111 | 5 | Papad | Spicy |
| 212 | 6 | Lilva Kachori | Spicy |
| 78 | 10 | Chapati | Normal |
| 190 | 10 | Keri no ras | Sour |
| 169 | 10 | Bajri no rotlo | Spicy |
| 195 | 10 | Koshimbir | Spicy |
| 214 | 10 | Khichu | Spicy |
4)(ii) To Plot the Graph.
In [30]:
plt.figure(figsize=(12,5))
plt.title('Top 10 Dishes Based on Cooking Time', fontsize=25)
plt.xticks(fontsize=9, rotation=45, ha='right')
plt.yticks(fontsize=12)
plt.ylabel('Cooking Time in Minutes', fontsize=15)
plt.xlabel('Name of the Dish', fontsize=15)
Graph = sns.barplot(y='Cooking_Time', x='Name', data=Top_10, palette='Set2', hue='Name', dodge=False, edgecolor='black')
for container in Graph.containers:
Graph.bar_label(container, fontsize=10) # Add labels to each bar
plt.show()
5)(i) To get the List of Top 10 Foods with HIGH Cooking Time.
In [32]:
Cooking_Time= Data[['Cooking_Time','Name','Flavor']]
Cooking_Time=Cooking_Time.sort_values(['Cooking_Time'],ascending=False)
Top_10=Cooking_Time.head(10)
Top_10
Out[32]:
| Cooking_Time | Name | Flavor | |
|---|---|---|---|
| 62 | 720 | Shrikhand | Sweet |
| 27 | 120 | Malapua | Sweet |
| 114 | 120 | Pindi chana | Spicy |
| 75 | 120 | Biryani | Spicy |
| 115 | 90 | Rajma chaval | Spicy |
| 128 | 90 | Dosa | Spicy |
| 142 | 90 | Kuzhakkattai | Spicy |
| 144 | 90 | Masala Dosa | Spicy |
| 83 | 90 | Daal baati churma | Spicy |
| 130 | 90 | Idli | Spicy |
5)(ii) To Plot the Graph.
In [34]:
plt.figure(figsize=(12,5))
plt.title('Top 10 Dishes Based on Cooking Time', fontsize=25)
plt.xticks(fontsize=9, rotation=45, ha='right')
plt.yticks(fontsize=12)
plt.ylabel('Cooking Time in Minutes', fontsize=15)
plt.xlabel('Name of the Dish', fontsize=15)
Graph = sns.barplot(y='Cooking_Time', x='Name', data=Top_10, palette='autumn', hue='Name', dodge=False, edgecolor='black')
for container in Graph.containers:
Graph.bar_label(container, fontsize=10) # Add labels to each bar
plt.show()
6)(i) To get the List of Foods in Different Regions of India.
In [36]:
Regions = Data.Region.value_counts().reset_index()
Regions
Out[36]:
| Region | count | |
|---|---|---|
| 0 | West | 74 |
| 1 | South | 59 |
| 2 | North | 53 |
| 3 | North East | 35 |
| 4 | East | 31 |
| 5 | Central | 3 |
6)(ii) To Plot the Graph
In [38]:
Regions.columns = ['Region','count']
plt.figure(figsize=(10, 8))
plt.pie(Regions['count'], labels=Regions['Region'], autopct='%1.1f%%', colors=sns.color_palette('Set2'),textprops={'fontsize': 12})
plt.title('Proportion of Dishes in Different Regions', fontsize=18)
plt.show()
7)(i) To get the List of Foods in Different States of India.
In [40]:
States = Data.State.value_counts().reset_index()
States
Out[40]:
| State | count | |
|---|---|---|
| 0 | Gujarat | 35 |
| 1 | Punjab | 32 |
| 2 | Maharashtra | 30 |
| 3 | West Bengal | 24 |
| 4 | Assam | 21 |
| 5 | Tamil Nadu | 20 |
| 6 | Meghalaya | 12 |
| 7 | Himachal Pradesh | 12 |
| 8 | Andhra Pradesh | 10 |
| 9 | Uttar Pradesh | 9 |
| 10 | Kerala | 8 |
| 11 | Odisha | 7 |
| 12 | Karnataka | 6 |
| 13 | Rajasthan | 6 |
| 14 | Telangana | 5 |
| 15 | Bihar | 3 |
| 16 | Goa | 3 |
| 17 | Manipur | 2 |
| 18 | Jammu & Kashmir | 2 |
| 19 | Madhya Pradesh | 2 |
| 20 | Uttarakhand | 1 |
| 21 | Tripura | 1 |
| 22 | Nagaland | 1 |
| 23 | Delhi | 1 |
| 24 | Chhattisgarh | 1 |
| 25 | Haryana | 1 |
7(ii) To Plot the Graph.
In [42]:
plt.figure(figsize=(12,5))
plt.title('No of Dishes in Different States', fontsize=25)
plt.xticks(fontsize=9, rotation=45, ha='right')
plt.yticks(fontsize=12)
plt.ylabel('No of Dishes', fontsize=15)
plt.xlabel('States', fontsize=15)
Graph = sns.barplot(y='count', x='State', data=States, palette='PuRd_r', hue='State', dodge=False, edgecolor='black')
for container in Graph.containers:
Graph.bar_label(container, fontsize=10) # Add labels to each bar
plt.show()
In [ ]: